In [8]:
import numpy as np
import matplotlib.pylab as plt
import numpy.random as npr
%matplotlib inline

Setting:

  • We have a set of datapoints $\mathcal{D} = \{x_1,x_2,\dots,x_m \}$.
  • Some of them we know to be similar $\mathcal{S} = {(x_i,x_j) |

In [41]:
from sklearn import datasets
X,Y = datasets.make_moons(100,noise=0.05)


/Users/joshuafass/anaconda/lib/python3.4/site-packages/sklearn/datasets/samples_generator.py:612: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = np.hstack([np.zeros(n_samples_in, dtype=np.intp),

In [42]:
plt.scatter(X[:,0],X[:,1],c=Y)


Out[42]:
<matplotlib.collections.PathCollection at 0x111688160>

In [43]:
D = X

In [44]:
S = set()
S.add(1)
S


Out[44]:
{1}

In [45]:
#S = set()
S = np.zeros((len(X),len(X)))
for i in range(len(X)):
    for j in range(i):
        if Y[i]==Y[j]:
            #S.add((i,j))
            S[i,j] = 1

In [46]:
plt.imshow(S,interpolation='none')


Out[46]:
<matplotlib.image.AxesImage at 0x111744fd0>

In [46]:


In [47]:
# learning diagonal metric
def g(A,S,D):
    ''' S given as a matrix?'''
    def d(x,y):
        return np.sqrt((x-y).T.dot(A).dot(x-y))
    
    first_comp = 0
    second_comp = 0
    for i in range(len(S)):
        for j in range(i):
            d_xy = d(D[i],D[j])
            if S[i,j]==1:
                first_comp += d_xy**2
            second_comp += d_xy
    return first_comp - np.log(second_comp)

In [48]:
def potential(A):
    return g(np.diag(A),S,D)

In [61]:
As = npr.rand(1000,2)*0.5

In [54]:
%timeit potential(As[0])


10 loops, best of 3: 37.7 ms per loop

In [62]:
ps = [potential(A) for A in As]

In [63]:
plt.scatter(As[:,0],As[:,1],c=ps,linewidths=0)
plt.colorbar()


Out[63]:
<matplotlib.colorbar.Colorbar at 0x111e7eba8>

In [64]:
np.min(ps),np.max(ps)


Out[64]:
(16.728475121361406, 1501.296383120022)

In [65]:
np.argmin(ps)


Out[65]:
925

In [66]:
As[np.argmin(ps)]


Out[66]:
array([ 0.00142986,  0.03807666])

In [ ]: